test
?RQDAQuery
query
RQDAQUery()
RQDAQuery()
library(pwr)
help(pwr)
help(pwr.f2.test)
pwr.f2.test(u = 6, f2 = 0.01/0.99, sig.level = 0.05, power = 0.8)
pwr.f2.test(u = 6, f2 = 0.015/0.985, sig.level = 0.05, power = 0.8)
pwr.f2.test(u = 6, f2 = 0.015/0.985, sig.level = 0.05, power = 0.9)
pwr.f2.test(u = 6, f2 = 0.0125/0.9875, sig.level = 0.05, power = 0.9)
a <- c(.5, .1, .9)
b <- c(.5, .2, .8)
c(0, .7, .4)
c <- c(0, .7, .4)
b <- c(.5, .2, .06)
c <- c(0, .7, .04)
help(ternary)
install.packages("installr")
require(installr)
updateR()
library(ggplot2)
install.packages("Hmisc")
install.packages("plotrix")
install.packages("RWinEdt")
install.packages("prettyR")
install.packages("xtable")
install.packages("ggplot2")
install.packages("plyr")
install.packages("sandwich")
install.packages("xlsx")
library(Zelig)
library(Zelig)
help(sink)
date()
library(Rqdta)
search.packages()
list.packages()
library(RQDA)
install.packages("RQDA")
library(RQDA)
letters()
letters
plot(1:26, 1:26, pch = "")
text(1:26, 1:26, labels = letters)
plot(1:26, 1:26, pch = "")
text(1:26, 1:26, labels = letters)
mydf <- as.data.frame(matrix(nrow = 26, ncol = 3))
names(mydf <- c("abbr", "myiv", "mydv")
mydf$abbr <- letters
mydf$myiv <- rnorm(26)
mydf$mydv <- mydf$myiv + rnorm(26, 1, .05)
mydf <- as.data.frame(matrix(nrow = 26, ncol = 3))
names(mydf <- c("abbr", "myiv", "mydv"))
mydf$abbr <- letters
mydf$myiv <- rnorm(26)
mydf$mydv <- mydf$myiv + rnorm(26, 1, .05)
mydf <- as.data.frame(matrix(nrow = 26, ncol = 3))
names(mydf) <- c("abbr", "myiv", "mydv")
mydf$abbr <- letters
mydf$myiv <- rnorm(26)
mydf$mydv <- mydf$myiv + rnorm(26, 1, .05)
mydf
plot(mydf$myiv, mydf$mydv, pch = "")
text(mydf$myiv, mydf$mydv, labels = mydf$abbr)
mydf <- as.data.frame(matrix(nrow = 26, ncol = 3))
names(mydf) <- c("abbr", "myiv", "mydv")
mydf$abbr <- letters
mydf$myiv <- rnorm(26)
mydf$mydv <- mydf$myiv + rnorm(26, 1, .5)
plot(mydf$myiv, mydf$mydv, pch = "")
text(mydf$myiv, mydf$mydv, labels = mydf$abbr)
library(foreign)
full <- read.dta("C:/users/michael/desktop/testingdata.dta")
corr(cbind(full$pppinc, full$impinc), w = full$weight)
library(boot)
corr(cbind(full$pppinc, full$impinc), w = full$weight)
nomiss <- subset(full, !is.na(full$pppinc))
corr(cbind(nomiss$pppinc, nomiss$impinc), w = nomiss$weight)
table(is.na(full$weight))
corr(d = cbind(nomiss$pppinc, nomiss$impinc), w = nomiss$weight)
nrow(nomiss)
library(weights)
install.packages("weights")
library(weights)
wtd.cors(full$pppinc, full$impinc, weight = full$weight)
by(full, by = full$simpleInc, function(x)wtd.cors(x$pppinc, x$impinc, weight = x$weight))
by(full, by = c(full$simpleInc), function(x)wtd.cors(x$pppinc, x$impinc, weight = x$weight))
is1 <- subset(full, simpleInc == 1)
wtd.cors(is1$pppinc, is1$impinc)
wtd.cors(is1$pppinc, is1$impinc, weight = is1$weight)
wtd.cors(is1$newinc, is1$impinc, weight = is1$weight)
wtd.cors(is1$newinc, is1$pppinc, weight = is1$weight)
wtd.cors(is1$newinc, is1$pppinc)
names(full)
full$testinc <- exp(full$loginc)
wtd.cors(is1$loginc, is1$pppinc)
is1 <- subset(full, simpleInc == 1)
wtd.cors(is1$testinc, is1$pppinc)
wtd.cors(is1$testinc, is1$impinc)
wtd.cors(is1$loginc, log(is1$pppinc)
)
wtd.cors(is1$loginc, log(is1$pppinc), weight = is1$weight
)
is1 <- subset(full, simpleInc == 2)
wtd.cors(is1$loginc, log(is1$pppinc), weight = is1$weight)
is1 <- subset(full, simpleInc == 10)
wtd.cors(is1$loginc, log(is1$pppinc), weight = is1$weight)
is1 <- subset(full, simpleInc == 9)
wtd.cors(is1$loginc, log(is1$pppinc), weight = is1$weight)
is1 <- subset(full, simpleInc == 1)
wtd.cors(is1$loginc, log(is1$impincinc), weight = is1$weight)
wtd.cors(is1$loginc, log(is1$impinc), weight = is1$weight)
wtd.cors(is1$loginc, log(is1$impinc)
)
wtd.cors(full$loginc, log(full$impinc))
wtd.cors(full$loginc, log(full$impinc), weight = full$weight)
wtd.cors(full$pppinc, full$impinc, weight = full$weight)
wtd.cors(log(full$pppinc), log(full$impinc), weight = full$weight)
wtd.cors(log(full$pppinc), log(full$impinc), weight = full$weight, na.rm = TRUE)
cor(full$pppinc, full$impinc)
cor(full$pppinc, full$impinc, use = "pariwise.complete.obs")
corr(full$pppinc, full$impinc, use = "pariwise.complete.obs")
help(cor)
#####################################################################################################
###### Replication file for "Income Measures in Cross-National Surveys: Problems and Solutions",
######      PSRM, conditionally accepted as of June 3, 2016
###### Authors: Michael J. Donnelly and Grigore Pop-Eleches
###### For questions about the code, please email Michael at michaeljamesdonnelly@gmail.com
#####################################################################################################
date()
rm(list = ls())                                           ## Start from a blank slate
## The script requires each of the following packages. If any are not installed on your machine,
## run the following, which is currently commented out:
##  install.packages("reldist")
##  install.packages("plotrix")
##  install.packages("lattice")
##  install.packages("ggplot2")
##  install.packages("foreign")
##  install.packages("questionr")
library(reldist)
library(plotrix)
library(lattice)
library(ggplot2)
library(foreign)
library(questionr)
## Set this path to the relevant directory
replpath <- "C:/users/michael/desktop/IncomeReplication"
## The necessary data are in the /ReplicationData folder or the replication package
setwd(paste(replpath, "/ReplicationData", sep = ""))
#####################################################################################################
###### Figure 1
#####################################################################################################
## This is the simplified main WVS file
full <- read.dta("WVSincomeReplicationData.dta")
## This contains macro data, including an estimate of houshold size
mcro <- subset(read.dta("macrovars2.dta"), select = c("conames", "year", "hhsize"))
names(mcro) <- c("conames", "year", "hsize")
full <- merge(full, mcro, by = c("conames", "year"), all.x = TRUE)
## Figure 1 has four country-waves. This selects those country-waves as separate files.
egy00 <- subset(full, conames == "Egypt" & year == 2000)
fin90 <- subset(full, conames == "Finland" & year == 1990)
bra91 <- subset(full, conames == "Brazil" & year == 1991)
nor96 <- subset(full, conames == "Norway" & year == 1996)
## For commentary about percentile of individuals in the 5th category
## This takes a weighted table and produces a cumulative weighted sum, then uses the midpoint of the
## fifth cagtegory as the estimate of the percentile for repsondents in that group
bratab <- wtd.table(x = bra91$simpleInc, weights = bra91$weight)    ## Weighted frequency table
brapercs <- bratab/sum(bratab)        ## turned into percentiles
sum(brapercs[1:4]) + (sum(brapercs[1:5]) - sum(brapercs[1:4]))/2    ## 0.886
fintab <- wtd.table(x = fin90$simpleInc, weights = fin90$weight)
finpercs <- fintab/sum(fintab)
sum(finpercs[1:4]) + (sum(finpercs[1:5]) - sum(finpercs[1:4]))/2    ## 0.135
one.wave.dats <- list(bra91, egy00, fin90, nor96)     ## puts those four into a list
laymat <- matrix(1:4, nrow = 2, byrow = TRUE)         ## the layout matrix (2x2 square)
## Return to the main folder
setwd(replpath)
png("Figure1.png")    ## open graphics device in png format
layout(laymat)
for (i in 1:4){
## This loops over the four data sets and plots a simple weighted histogram
thisdat <- subset(one.wave.dats[[i]], !is.na(simpleInc))
thisTitle <- paste(thisdat$conames[1], thisdat$year[1], sep = "\n")
## holder is a weighted histogram object that is not plotted yet
holder <- weighted.hist(thisdat$simpleInc,
w = thisdat$weight,
freq = FALSE,
breaks = c(seq(1, 11, by = 1)),
axisnames = FALSE,
plot = FALSE)
if (i == 1){par(mar = c(1, 4, 5, 0))}     ## Slightly different margins based on where in the
if (i == 2){par(mar = c(1, 1, 5, 1))}     ## square we are
if (i == 3){par(mar = c(3, 4, 2, 0))}
if (i == 4){par(mar = c(3, 1, 2, 1))}
## Now to plot them
barplot(holder$density,
axes = FALSE,
space = 0,
ylim = c(0, .3),
col = "gray90",
cex.lab = 1.5,
main = thisTitle,
xlim = c(0, 10))
if (i == 1){ ## These if statements determine whether to add axes
axis(side = 2,
at = seq(0, .3, by = .1),
labels = c("0", "10", "20", "30%"),
las = 2)
}
if (i == 2){ ## No axes on the top-right panel
}
if (i == 3){
axis(side = 2,
at = seq(0, .3, by = .1),
labels = c("0", "10", "20", "30%"),
las = 2)
axis(side = 1,
at = seq(.5, 9.5, by = 1),
labels = 1:10, cex.axis = .9)
}
if (i == 4){
axis(side = 1,
at = seq(.5, 9.5, by = 1),
labels = 1:10, cex.axis = .9)
}
##
abline(h = .1, lty = "dashed")
##
}
dev.off()
#####################################################################################################
###### Figure 2
#####################################################################################################
## These creates a simplified macro data set based on our imputed data
myginis <- by(full, list(full$abbr),
function(x)gini(subset(x, !is.na(newinc))$newinc,
weights = subset(x, !is.na(newinc))$weight)*100)
gsoltginis <- by(full, list(full$abbr),
function(x)mean(x$ggini, na.rm = TRUE))
nsoltginis <- by(full, list(full$abbr),
function(x)mean(x$ngini, na.rm = TRUE))
pppGDPs <- by(full, list(full$abbr),
function(x)weighted.mean(x$pppid*(100/x$cpiIndex),
weights = x$weight,
na.rm = TRUE))
pppEST <- by(full, list(full$abbr),
function(x)weighted.mean(x$pppinc/mean(x$hsize, na.rm = TRUE),
weights = x$weight,
na.rm = TRUE))
GDPs <- by(full, list(full$abbr),
function(x)weighted.mean(x$usd*(100/x$cpiIndex),
weights = x$weight,
na.rm = TRUE))
EST <- by(full, list(full$abbr),
function(x)weighted.mean(x$newinc/mean(x$hsize, na.rm = TRUE),
weights = x$weight,
na.rm = TRUE))
## This sticks all of those aggregated measures into a single data frame
mydat <- data.frame(matrix(NA, nrow = length(EST), ncol = 7))
names(mydat) <- c("abbr", "EST", "GDP", "pppEST", "pppGDP",
"mygini", "soltgini")
mydat$abbr <- dimnames(EST)[[1]]
mydat$pppGDP <- pppGDPs
mydat$GDP <- GDPs
mydat$pppEST <- pppEST
mydat$EST <- EST
mydat$mygini <- myginis
mydat$mygini <- ifelse(mydat$mygini == 0, NA, mydat$mygini)   ## the gini function spits out 0 for
## missing data
mydat$gsoltgini <- gsoltginis
mydat$nsoltgini <- nsoltginis
axpts <- seq(0, 60000, by = 10000)
axlabs <- c("0", "$10,000", "$20,000",
"$30,000", "$40,000", "$50,000",
"$60,000")
## A function taken from
## https://stat.ethz.ch/pipermail/r-help/2011-October/291396.html
## This simply formats the R squared
r2format <- function(object, digits = 3, output, sub, expression = TRUE, ...) {
if (inherits(object, "lm")) {
x <- summary(object)
} else if (inherits(object, "summary.lm")) {
x <- object
} else stop("object is an unmanageable class")
out <- format(x$r.squared, digits = digits)
if (!missing(output)) {
output <- gsub(sub, out, output)
} else {
output <- out
}
if (expression) {
output <- parse(text = output)
}
return(output)
}
## Now for the actual plot, which shows the relationship between estimated and
png("Figure2.png", width = 6, height = 4, units = "in", res = 960)
m1 <- lm(mydat$pppEST ~ mydat$pppGDP)    ## Simple OLS model for comparison
plot(x = 0, y = 0, pch = "",             ## plots nothing, but sets up the plot window
axes = FALSE,
ylim = c(0, 30000),
xlim = c(0, 60000),
xlab = "World Bank GDP per capita (PPP)",
ylab = "World Values personal income per capita (PPP)")
text(x = mydat$pppGDP, y = mydat$pppEST, labels = mydat$abbr, cex = .4)  ## puts the abbreviations
## in as points
axis(side = 1, at = axpts, labels = axlabs, cex.axis = .5)
axis(side = 2, at = axpts, labels = axlabs, cex.axis = .5, las = 2)
abline(m1, lty = "dashed")                                               ## ols line
text(52000, 22000, r2format(m1, 2, "R^2 == rval", "rval"))
abline(a = 0, b = 1)                                                     ## 45 degree line
dev.off()
#####################################################################################################
###### Figure 2
#####################################################################################################
setwd(paste(replpath, "/ReplicationData/", sep = ""))
## The next four sections pull out a single country-wave from another cross-national survey to show
## examples of realized income distributions
cses3 <- read.dta("FRA07csesIncome.dta")              ## Comparative Study of Electoral Systems
## France 2007 income and weight data
cses3$simpleInc <- match(cses3$C2020, c("1. LOWEST HOUSEHOLD INCOME QUINTILE",
"2. SECOND HOUSEHOLD INCOME QUINTILE",
"3. THIRD HOUSEHOLD INCOME QUINTILE",
"4. FOURTH HOUSEHOLD INCOME QUINTILE",
"5. HIGHEST HOUSEHOLD INCOME QUINTILE"))
fra07 <- subset(cses3, C1004 == "FRA_2007" & !is.na(cses3$simpleInc) &
!is.na(cses3$C1010_2))      ## removing missings because
## weighted.hist() doesn't
## work with missing data
ISSPsi <- read.dta("ISSPsiIncome.dta")                ## ISSP Social Inequality Module
conames <- read.csv("isspsicountries.csv")            ## For matching country names
ISSPsi <- merge(ISSPsi, conames, by = "country")
ISSPsi$country <- ISSPsi$newcountry
ISSPsi$cwv <- paste(ISSPsi$country, ISSPsi$year)
De99 <- subset(ISSPsi, cwv == "Germany 1999" & !is.na(income) &
!is.na(weight))
ess <- read.dta("IrelandESSIncome6.dta")              ## European Social Survey
## Ireland Round 6 income and weight data
ess$cwv <- paste(ess$cntry, ess$essround)
ess$simpleInc <- match(ess$inccode, c("J", "R", "C", "M", "F", "S",
"K", "P", "D", "H", "U", "N"))  ## Income codes, turns "NA"
## into NA
ire6 <- subset(ess, !is.na(ess$simpleInc) & !is.na(ess$dweight))
Lat03 <- read.dta("Latvia2003.dta")                   ## Cand. Count. Eurobarometer
## Latvia, 2003 income and weight data
Lat03$simpleInc <- match(Lat03$d12, c("1st (lowest) income decile",
"2nd income decile",
"3rd income decile",
"4th income decile",
"5th income decile",
"6th income decile",
"7th income decile",
"8th income decile",
"9th income decile",
"10th (highest) income decile"))
Lat03 <- subset(Lat03, !is.na(simpleInc) & !is.na(weight1))
## Creates four weighted.hist() objects without plotting them
holder1 <- weighted.hist(ire6$simpleInc,
w = ire6$dweight,
freq = FALSE,
breaks = c(seq(1, 11, by = 1)),
axisnames = FALSE,
plot = FALSE)
holder2 <- weighted.hist(fra07$simpleInc,
w = fra07$C1010_2,
freq = FALSE,
breaks = c(seq(0.5, 5.5, by = 1)),
axisnames = FALSE,
plot = FALSE)
holder3 <- weighted.hist(De99$income,
w = De99$weight,
freq = FALSE,
breaks = c(seq(1, 11, by = 1)),
axisnames = FALSE,
plot = FALSE)
holder4 <- weighted.hist(Lat03$simpleInc,
w = Lat03$weight,
freq = FALSE,
breaks = c(seq(1, 11, by = 1)),
axisnames = FALSE,
plot = FALSE)
## Now for the plotting
setwd(replpath)
png("Figure3.png", width = 540, height = 360)
layout(matrix(1:4, nrow = 2, ncol = 2)) ## square layout
barplot(holder1$density,
axes = FALSE,
space = 0,
ylim = c(0, .2),
col = "gray90",
cex.lab = 1.5,
main = "Irish Income - ESS 2012/13",
xlim = c(0, 10))
axis(side = 2,
at = seq(0, .2, by = .1),
labels = c("0", "10", "20%"),
las = 2)
axis(side = 1,
at = seq(.5, 9.5, by = 1),
labels = 1:10, cex.axis = .9)
barplot(holder2$density,
axes = FALSE,
space = 0,
ylim = c(0, .35),
col = "gray90",
cex.lab = 1.5,
main = "French Income - CSES 2007",
xlim = c(0, 5))
axis(side = 2,
at = seq(0, .3, by = .1),
labels = c("0", "10", "20", "30%"),
las = 2)
axis(side = 1,
at = seq(.5, 4.5, by = 1),
labels = 1:5, cex.axis = .9)
barplot(holder3$density,
axes = FALSE,
space = 0,
ylim = c(0, .2),
col = "gray90",
cex.lab = 1.5,
main = "German Income - ISSP 1999",
xlim = c(0, 10))
axis(side = 2,
at = seq(0, .2, by = .1),
labels = c("0", "10", "20%"),
las = 2)
axis(side = 1,
at = seq(.5, 9.5, by = 1),
labels = 1:10, cex.axis = .9)
barplot(holder4$density,
axes = FALSE,
space = 0,
ylim = c(0, .3),
col = "gray90",
cex.lab = 1.5,
main = "Latvian Income - CCEB 2003",
xlim = c(0, 10))
axis(side = 2,
at = seq(0, .3, by = .1),
labels = c("0", "10", "20", "30%"),
las = 2)
axis(side = 1,
at = seq(.5, 9.5, by = 1),
labels = 1:10, cex.axis = .9)
dev.off()
#####################################################################################################
###### Individual-level correlations between income derived from bands and imputed income
#####################################################################################################
full$originc <- exp(full$loginc)
cor(x = full$originc, y = full$impinc, use = "pairwise.complete.obs") # 0.918
by(full, full$simpleInc, function(dat){cor(x = dat$originc, y = dat$impinc,
use = "pairwise.complete.obs")})
## Numbers range from 0.82 to 0.89
full$cwv <- paste(full$conames, full$wv)
cwvcors <- unlist(by(full, full$cwv, function(dat){cor(x = dat$originc, y = dat$impinc,
use = "pairwise.complete.obs")}))
subset(cwvcors, cwvcors < .9) ## Only Malta 1
#####################################################################################################
###### Appendix B
#####################################################################################################
nr <- 5        ## number of rows and columns per page in the appendix
nc <- 3
fullnomiss <- subset(full, !is.na(simpleInc))
## The next few names are too long. This abbreviates them
fullnomiss$conames <- ifelse(fullnomiss$conames == "Bosnia and Herzegovina",
"Bosnia", as.character(fullnomiss$conames))
fullnomiss$conames <- ifelse(fullnomiss$conames == "Czech Republic",
"Czech R.", as.character(fullnomiss$conames))
fullnomiss$conames <- ifelse(fullnomiss$conames == "Dominican Republic",
"Dominican R.", as.character(fullnomiss$conames))
fullnomiss$conames <- ifelse(fullnomiss$conames == "Northern Ireland",
"Nor. Ireland", as.character(fullnomiss$conames))
fullnomiss$conames <- ifelse(fullnomiss$conames == "SwitzerlandSubinc",
"Switz. (Subj.)", as.character(fullnomiss$conames))
# Fixing the year label in waves that straddled two years
fullnomiss$wyr <- ifelse(fullnomiss$conames == "Slovakia" & fullnomiss$wv == 2, "1990/91",
ifelse(fullnomiss$conames == "Spain" & fullnomiss$wv == 4, "1999/2000",
ifelse(fullnomiss$conames == "Poland" & fullnomiss$wv == 3, "1989/90",
ifelse(fullnomiss$conames == "Colombia" & fullnomiss$wv == 2, "1997/98",
ifelse(fullnomiss$conames == "Czech Republic" & fullnomiss$wv == 2,
"1990/91", as.character(fullnomiss$year))))))
fullnomiss$CWave <- paste(fullnomiss$conames, fullnomiss$wyr, sep = " ")
CWVS <- unique(fullnomiss$CWave)           ## Total number of country-waves
pgs <- ceiling(length(CWVS)/(nr*nc))       ## Number of pages necessary to include all country-waves
setwd(paste(replpath, "/AppendixB/", sep = ""))
for (i in 1:pgs){
thisblock <- ((i - 1)*nr*nc + 1):(i*nr*nc)
thesesvys <- sort(CWVS)[thisblock]                  ## Selects out the surveys to be plotted on
## this page
home <- subset(fullnomiss, !is.na(match(CWave, thesesvys)))
pname <- paste("AppBpage", i, ".pdf", sep = "")     ## Name of the pdf file
pdf(pname)
pt <- qplot(x = simpleInc, ..density..,         ## Standard ggplot2 histogram
data=home, geom="histogram",
weight=weight, binwidth = 1,
boundary = .5) +
xlab("") +
ylab("") + theme_bw() +
scale_x_continuous(breaks = 1:10)+
theme(axis.title.y = element_text(size = 15, angle = 90)) +
theme(strip.text.x = element_text(size = 15)) +
ylim(0, .5) +
facet_wrap(~CWave, ncol = nc, nrow = nr)
print(pt)
dev.off()
print(i)                                            ## for monitoring progress
}
## Finished
setwd(replpath)
date()
